global projectdir "~"
global datadir "$projectdir/data"


clear
cd $datadir/intermediate_files
*use br_ein_firmid_xwalk, clear
*use br_ein_firmid_xwalk_dedup, clear


* IPEDS EINs
cd $datadir/intermediate_files
use organize_ipeds_ein, clear
keep ein
duplicates drop
tempfile univeins
save `univeins', replace


clear
gen year=.
tempfile stackyears
save `stackyears', replace


forvalues i = 2005/2017 {

	display in red "--------- Year `i' ----------"

	cd $datadir/raw_pulls/w2_usiris
	use iw2ty`i'_summaryrecs_usiris_2018q4, clear
	append using iw2ty`i'_dups_flagged_usiris_2018q4
	keep if dup_flag == "N1" | dup_flag == "AX" | dup_flag == "X1" | dup_flag == "D1" | dup_flag == "FX" | dup_flag == "TX"
	keep pik ein wage_tip year
	
	
	*Merge in firmid crosswalk, first from the lehd
	display "--Add LEHD Data--"
	cd $datadir/intermediate_files
	merge m:1 year ein using lehd_ein_firm_age_size_naics,update
	drop if _merge == 2
	drop _merge
	
	replace firmid_filled = "" if substr(firmid_filled, 1, 1)=="S"
	
	rename firmage firmage_hold
	gen year_hold = year
	
	*try adjacent years
	foreach off_set of numlist 1 -1 2 -2 {
		local off_set_year = `i' + `off_set'
		replace year = `off_set_year' if year_hold==`i'
		merge m:1 year ein using lehd_ein_firm_age_size_naics,update
		drop if _merge == 2
		drop _merge
		
		replace firmid_filled = "" if substr(firmid_filled, 1, 1)=="S"
		replace firmage_hold = firmage - `off_set' if missing(firmage_hold)
		drop firmage

		replace year = year_hold
	}
	rename firmage_hold firmage
	
	rename firmid firmid_lehd
	replace firmid_lehd = firmid_filled if missing(firmid_lehd)
	
	rename firmage firmage_lehd
	rename firmsize firmsize_lehd
	
	drop quarter fas_ein lbd_match firmid_filled year_hold
	
	*Add in LBD firmids
	display "--Add LBD--"
	
	foreach off_set of numlist 0 1 -1 2 -2 {
		local off_set_year = `i' + `off_set'
		capture noisily {
		merge m:1 ein using ./BR/w2_ssl_`off_set_year'_usiris_2018q4_dedup, update
		drop if _merge == 2
		drop _merge
		}
	}
	rename firmid firmid_lbd_xwalk

	gen firmid = firmid_lehd
	replace firmid = firmid_lbd_xwalk if missing(firmid)
	replace firmid = "0" + ein if missing(firmid)
	drop firmid_lbd_xwalk
	
	* IPEDS EINs
	display "--Add Uni Data--"
	merge m:1 ein using `univeins'
	drop if _merge == 2
	gen univ_ein = (_merge == 3)
	drop _merge
	
	destring wage_tip, replace
	
	gsort pik firmid -wage_tip 
	replace mode_naics2007fnl_emp=mode_naics2007fnl_emp[_n-1] if pik==pik[_n-1] & firmid==firmid[_n-1] & wage_tip<=wage_tip[_n-1]
	replace mode_naics2012fnl_emp=mode_naics2012fnl_emp[_n-1] if pik==pik[_n-1] & firmid==firmid[_n-1] & wage_tip<=wage_tip[_n-1]	
	
	*Don't make this into a firm-level file yet
	*collapse (sum) wage_tip (max) univ_ein firmage_lehd firmsize_lehd, by(pik firmid year  mode_naics2007fnl_emp mode_naics2012fnl_emp) fast

	*Add in other firm var from lbd
	display "--Add LBD Frims--"
	gen f_naics12_6 = ""
	gen lbd_estabs_t = .
	gen lbd_emp_t = .
	gen lbd_pay_t = .
	gen firmage_hold = .
	
	foreach off_set of numlist 0 1 -1 2 -2 {
	local off_set_year = `i' + `off_set'
	
	capture noisily {
	merge m:1 ein using ./BR/ein_naics_`off_set_year'.dta, update
	drop if _merge==2
	drop _merge
	replace f_naics12_6 = f_fk_naics12_6 if missing(f_naics12_6)
	drop f_fk_naics12_6	
	}
	
	capture noisily {
	*LBD data year 2016 Frims data
	merge m:1 firmid using $datadir/raw_pulls/lbd_firms/firm_`off_set_year'_emp_c201600.dta, keepusing(estabs emp pay f_lastyear firmage) update
	drop if _merge==2
	drop _merge
	
	replace lbd_estabs_t = estabs if missing(lbd_estabs_t)
	replace lbd_emp_t = emp if missing(lbd_emp_t)
	replace lbd_pay_t = pay if missing(lbd_pay_t)
	replace firmage_hold = firmage - `off_set' if missing(firmage_hold)
	
	drop estabs emp pay firmage
	}
	}
	
	rename firmage_hold firmage_lbd
		
	
	append using `stackyears'
	
	tempfile stackyears
	save `stackyears', replace

}

*Fill in data from other years
display "--Fill in Data--"

sort pik firmid year

forvalues i=0/15{
replace f_naics12_6=f_naics12_6[_n-1] if missing(f_naics12_6) & pik==pik[_n-1] & firmid==firmid[_n-1]
replace mode_naics2007fnl_emp=mode_naics2007fnl_emp[_n-1] if missing(mode_naics2007fnl_emp) & pik==pik[_n-1] & firmid==firmid[_n-1]
replace mode_naics2012fnl_emp= mode_naics2012fnl_emp[_n-1] if missing( mode_naics2012fnl_emp) & pik==pik[_n-1] & firmid==firmid[_n-1]

replace f_naics12_6=f_naics12_6[_n+1] if missing(f_naics12_6) & pik==pik[_n+1] & firmid==firmid[_n+1]
replace mode_naics2007fnl_emp=mode_naics2007fnl_emp[_n+1] if missing(mode_naics2007fnl_emp) & pik==pik[_n+1] & firmid==firmid[_n+1]
replace mode_naics2012fnl_emp= mode_naics2012fnl_emp[_n+1] if missing(mode_naics2012fnl_emp) & pik==pik[_n+1] & firmid==firmid[_n+1]

replace f_lastyear=f_lastyear[_n-1] if missing(f_lastyear) & pik==pik[_n-1] & firmid==firmid[_n-1]
replace f_lastyear=f_lastyear[_n+1] if missing(f_lastyear) & pik==pik[_n+1] & firmid==firmid[_n+1]

replace firmage_lehd=firmage_lehd[_n-1] if missing(firmage_lehd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]
replace firmage_lbd=firmage_lbd[_n-1] if missing(firmage_lbd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]
replace firmage_lehd=firmage_lehd[_n+1] if missing(firmage_lehd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]
replace firmage_lbd=firmage_lbd[_n+1] if missing(firmage_lbd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]

replace firmage_lehd=firmage_lehd[_n-1]+1 if missing(firmage_lehd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+1
replace firmage_lbd=firmage_lbd[_n-1]+1 if missing(firmage_lbd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+1
 
replace firmage_lehd=firmage_lehd[_n+1]-1 if missing(firmage_lehd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]-1
replace firmage_lbd=firmage_lbd[_n+1]-1 if missing(firmage_lbd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]-1
}


sort firmid year wage_tip

forvalues i=0/50{
replace f_naics12_6=f_naics12_6[_n-1] if missing(f_naics12_6) & firmid==firmid[_n-1]
replace mode_naics2007fnl_emp=mode_naics2007fnl_emp[_n-1] if missing(mode_naics2007fnl_emp) & firmid==firmid[_n-1]
replace  mode_naics2012fnl_emp= mode_naics2012fnl_emp[_n-1] if missing( mode_naics2012fnl_emp) & firmid==firmid[_n-1]

replace f_naics12_6=f_naics12_6[_n+1] if missing(f_naics12_6) & pik==pik[_n+1] & firmid==firmid[_n+1]
replace mode_naics2007fnl_emp=mode_naics2007fnl_emp[_n+1] if missing(mode_naics2007fnl_emp) & firmid==firmid[_n+1]
replace mode_naics2012fnl_emp= mode_naics2012fnl_emp[_n+1] if missing(mode_naics2012fnl_emp) & firmid==firmid[_n+1]

replace f_lastyear=f_lastyear[_n-1] if missing(f_lastyear) & firmid==firmid[_n-1]
replace f_lastyear=f_lastyear[_n+1] if missing(f_lastyear) & firmid==firmid[_n+1]

*same year
replace firmage_lehd=firmage_lehd[_n-1] if missing(firmage_lehd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]
replace firmage_lbd=firmage_lbd[_n-1] if missing(firmage_lbd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]
 
replace firmage_lehd=firmage_lehd[_n+1] if missing(firmage_lehd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]
replace firmage_lbd=firmage_lbd[_n+1] if missing(firmage_lbd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]

*diff year
replace firmage_lehd=firmage_lehd[_n-1]+1 if missing(firmage_lehd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+1
replace firmage_lbd=firmage_lbd[_n-1]+1 if missing(firmage_lbd) & pik==pik[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+1
 
replace firmage_lehd=firmage_lehd[_n+1]-1 if missing(firmage_lehd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]-1
replace firmage_lbd=firmage_lbd[_n+1]-1 if missing(firmage_lbd) & pik==pik[_n+1] & firmid==firmid[_n+1] & year==year[_n+1]-1
}

gen firmage = firmage_lehd
replace firmage = firmage_lbd if missing(firmage)

gen naics2012 = mode_naics2012fnl_emp
replace naics2012 = f_naics12_6 if missing(naics2012)

tempfile stackyears
save `stackyears', replace




************************************************************
************************************************************

display "--------------Put Together--------------"
cd $datadir/raw_pulls/umetrics/FSRDC_2018
use usiris_emnmxwalk_2018q4a_2018q4a, clear
keep emplid pik
drop if missing(pik)
cd $datadir/intermediate_files
joinby pik using `stackyears', unmatched(both)
gen flag_emplid_w2 = (_merge == 3)
drop _merge
rename emplid employeeid

order employeeid pik ein firmid year 
sort employeeid pik firmid year

compress
cd $datadir/intermediate_files
save iris_w2_jobhistory_2018q4, replace




